####################################################################################################
### Title: People Consistently View Elections and Civil Liberties as Key Components of Democracy ###
### Content: Sample demographics (Table S4)                                                      ###
### Date: August 24, 2024                                                                        ###
####################################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/Science_Replication/demographics")

## Install the cjdata package (if not yet installed)
# library(devtools)  # version 2.4.3
# install_github(repo = "yhoriuchi/cjdata")

## Load the required packages
library(tidyverse) # version 2.0.0
library(cjdata)    # version 0.1.0

## Read the raw datasets
df_US <- read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_US.csv")
df_IT <- read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_IT.csv")
df_EG <- read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_EG.csv")
df_IN <- read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_IN.csv")
df_TH <- read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_TH.csv")
df_JP <- read_Qualtrics("~/Desktop/Science_Replication/data_cleaning/raw_JP.csv")

### Age ----
## The United States
mean(df_US$age)
sd(df_US$age)
df_US <- df_US %>% mutate(age5 = case_when(
  age >= 18 & age <= 29 ~ "18-29",
  age >= 30 & age <= 39 ~ "30-39",
  age >= 40 & age <= 49 ~ "40-49",
  age >= 50 & age <= 59 ~ "50-59",
  age >= 60 ~ "60+",
))
table(df_US$age5) %>% prop.table() * 100

## Italy
mean(df_IT$age)
sd(df_IT$age)
df_IT <- df_IT %>% mutate(age5 = case_when(
  age >= 18 & age <= 29 ~ "18-29",
  age >= 30 & age <= 39 ~ "30-39",
  age >= 40 & age <= 49 ~ "40-49",
  age >= 50 & age <= 59 ~ "50-59",
  age >= 60 ~ "60+"
))
table(df_IT$age5) %>% prop.table() * 100

## Egypt
mean(df_EG$age)
sd(df_EG$age)
df_EG <- df_EG %>% mutate(age5 = case_when(
  age >= 18 & age <= 29 ~ "18-29",
  age >= 30 & age <= 39 ~ "30-39",
  age >= 40 & age <= 49 ~ "40-49",
  age >= 50 & age <= 59 ~ "50-59",
  age >= 60 ~ "60+"
))
table(df_EG$age5) %>% prop.table() * 100

## India
mean(df_IN$age)
sd(df_IN$age)
df_IN <- df_IN %>% mutate(age5 = case_when(
  age >= 18 & age <= 29 ~ "18-29",
  age >= 30 & age <= 39 ~ "30-39",
  age >= 40 & age <= 49 ~ "40-49",
  age >= 50 & age <= 59 ~ "50-59",
  age >= 60 ~ "60+"
))
table(df_IN$age5) %>% prop.table() * 100

## Thailand
mean(df_TH$age)
sd(df_TH$age)
df_TH <- df_TH %>% mutate(age5 = case_when(
  age >= 18 & age <= 29 ~ "18-29",
  age >= 30 & age <= 39 ~ "30-39",
  age >= 40 & age <= 49 ~ "40-49",
  age >= 50 & age <= 59 ~ "50-59",
  age >= 60 ~ "60+"
))
table(df_TH$age5) %>% prop.table() * 100

## Japan
mean(df_JP$age)
sd(df_JP$age)
df_JP <- df_JP %>% mutate(age5 = case_when(
  age >= 18 & age <= 29 ~ "18-29",
  age >= 30 & age <= 39 ~ "30-39",
  age >= 40 & age <= 49 ~ "40-49",
  age >= 50 & age <= 59 ~ "50-59",
  age >= 60 ~ "60+"
))
table(df_JP$age5) %>% prop.table() * 100

### Gender ----
## The United States
df_US <- df_US %>% mutate(gender3 = case_when(
  gender == "Female" ~ 1,
  gender == "Male" ~ 0,
  gender == "Non-binary/third gender" | gender == "Not listed (please specify)" |
    gender == "Prefer not to say" ~ 2
))
df_US$gender3 <- factor(df_US$gender3, 0:2, c("Male", "Female", "Other"))
table(df_US$gender3) %>% prop.table() * 100

## Italy
df_IT <- df_IT %>% mutate(gender3 = case_when(
  gender == "Femmina" ~ 1,
  gender == "Maschio" ~ 0,
  gender == "Non binario/terzo genere" | gender == "Non elencato (per favore, specifica)" |
    gender == "Preferisco non rispondere" ~ 2
))
df_IT$gender3 <- factor(df_IT$gender3, 0:2, c("Male", "Female", "Other"))
table(df_IT$gender3) %>% prop.table() * 100

## Egypt
df_EG <- df_EG %>% mutate(gender3 = case_when(
  gender == "أنثى" ~ 1,
  gender == "ذكر" ~ 0,
  gender == "أفضل عدم الإفصاح " | gender == "جنس آخر/ثالث" |
    gender == "غير مدرج في القائمة (يرجى التحديد)" ~ 2
))
df_EG$gender3 <- factor(df_EG$gender3, 0:2, c("Male", "Female", "Other"))
table(df_EG$gender3) %>% prop.table() * 100

## India
df_IN <- df_IN %>% mutate(gender3 = case_when(
  gender == "महिला" ~ 1,
  gender == "पुरुष" ~ 0,
  gender == "कुछ नहीं कहना चाहूँगा" | gender == "नॉन-बाइनरी/तीसरा लिंग" | 
    gender == "सूचीबद्ध नहीं (कृपया निर्दिष्ट करें)" ~ 2
))
df_IN$gender3 <- factor(df_IN$gender3, 0:2, c("Male", "Female", "Other"))
table(df_IN$gender3) %>% prop.table() * 100

## Thailand
df_TH <- df_TH %>% mutate(gender3 = case_when(
  gender == "หญิง" ~ 1,
  gender == "ชาย" ~ 0,
  gender == "นอน-ไบนารี / เพศที่สาม" | gender == "ไม่ประสงค์ที่จะระบุ" | 
    gender == "อื่น ๆ (โปรดระบุ)" ~ 2
))
df_TH$gender3 <- factor(df_TH$gender3, 0:2, c("Male", "Female", "Other"))
table(df_TH$gender3) %>% prop.table() * 100

## Japan
df_JP <- df_JP %>% mutate(gender3 = case_when(
  gender == "女性" ~ 1,
  gender == "男性" ~ 0,
  gender == "ノンバイナリー／第三の性" | gender == "答えたくない" |
    gender == "記載なし（具体的にご記入ください）" ~ 2
))
df_JP$gender3 <- factor(df_JP$gender3, 0:2, c("Male", "Female", "Other"))
table(df_JP$gender3) %>% prop.table() * 100

### Education ----
## The United States
df_US <- df_US %>% mutate(edu_bin = case_when(
  edu == "Less than high school" |
    edu == "High school or equivalent" |
    edu == "Some college, no degree" |
    edu == "Associate's Degree (AA)" ~ 0,
  edu == "Bachelor's Degree (BA, BS, BBA)" |
    edu == "Advanced degree (MA, MS, MBA, PhD, JD, MD, etc.)" ~ 1
))
df_US$edu_bin <- factor(df_US$edu_bin, 0:1, c("No College", "College"))
table(df_US$edu_bin) %>% prop.table() * 100

## Italy
df_IT <- df_IT %>% mutate(edu_bin = case_when(
  edu == "Inferiore al diploma superiore" |
    edu == "Diploma superiore o equivalente" |
    edu == "Diploma universitario" |
    edu == "Studi universitari, senza conseguire alcuna laurea" ~ 0,
  edu == "Laurea Triennale" |
    edu == "Laurea Specialistica/Master/Dottorato di Ricerca (MA, MS, MBA, PhD, JD, MD, etc.)" ~ 1
))
df_IT$edu_bin <- factor(df_IT$edu_bin, 0:1, c("No College", "College"))
table(df_IT$edu_bin) %>% prop.table() * 100

## Egypt
df_EG <- df_EG %>% mutate(edu_bin = case_when(
  edu == "أقل من الثانوية" |
    edu == "ما يعادل المدرسة الثانوية" |
    edu == "درجة دبلوم جامعي" |
    edu == "كلية ما، لا يوجد درجة بعد الكلية" ~ 0,
  edu == "درجة البكالوريوس / درجة أربع سنوات (بكالوريوس تجارة، بكالوريوس علوم، بكالوريوس إدارة أعمال)" |
    edu == "درجة متقدمة (ماجستير، ماجستير في الآداب، ماجستير في العلوم، ماجستير في إدارة الأعمال، دكتوراه، دكتوراه في القانون، دكتوراه في الطب، وما إلى ذلك.)" ~ 1
))
df_EG$edu_bin <- factor(df_EG$edu_bin, 0:1, c("No College", "College"))
table(df_EG$edu_bin) %>% prop.table() * 100

## India
df_IN <- df_IN %>% mutate(edu_bin = case_when(
  edu == "हाई स्कूल से कम" |
    edu == "हाई स्कूल के समकक्ष" |
    edu == "असोशीएट डिग्री (AA)" |
    edu == "कोई कॉलेज, कोई डिग्री नहीं" ~ 0,
  edu == "स्नातक की डिग्री/चार-साल की डिग्री (BB, BS, BBA)" |
    edu == "एडवांस्ड डिग्री (MA, MS, MBA, PHD, JD, MD, आदि)" ~ 1
))
df_IN$edu_bin <- factor(df_IN$edu_bin, 0:1, c("No College", "College"))
table(df_IN$edu_bin) %>% prop.table() * 100

## Thailand
df_TH <- df_TH %>% mutate(edu_bin = case_when(
  edu == "ต่ำกว่ามัธยมปลาย" |
    edu == "เทียบเท่ามัธยมปลาย" |
    edu == "ระดับอนุปริญญา (อ.ศศ.)" |
    edu == "วิทยาลัย ไม่ใช่ระดับปริญญา" ~ 0,
  edu == "ปริญญาตรี/ปริญญาสี่ปี (พธ.บ. วท.บ. บธ.บ.)" |
    edu == "ปริญญาโท-เอก (ศศ.ด. วท.ม. บธ.ม. ปร.ด. น.บ. พ.บ. ฯลฯ)" ~ 1
))
df_TH$edu_bin <- factor(df_TH$edu_bin, 0:1, c("No College", "College"))
table(df_TH$edu_bin) %>% prop.table() * 100

## Japan
df_JP <- df_JP %>% mutate(edu_bin = case_when(
  edu == "中学校卒業以下" |
    edu == "高校卒業または同程度の学歴" |
    edu == "短期大学士" |
    edu == "一部の大学、学位なし" ~ 0,
  edu == "学士号／4年制学士" |
    edu == "上級学位（修士号、博士号、法務博士など）" ~ 1
))
df_JP$edu_bin <- factor(df_JP$edu_bin, 0:1, c("No College", "College"))
table(df_JP$edu_bin) %>% prop.table() * 100

### Importance of democracy ----
## The United States
mean(df_US$democracy_impt_1, na.rm = T)

## Italy
mean(df_IT$democracy_impt_1, na.rm = T)

## Egypt
mean(df_EG$democracy_impt_1, na.rm = T)

## India
mean(df_IN$democracy_impt_1, na.rm = T)

## Thailand
mean(df_TH$democracy_impt_1, na.rm = T)

## Japan
mean(df_JP$democracy_impt_1, na.rm = T)
